clear
clear matrix
capture set memory 10g
capture set maxvar 32767
capture set matsize 11000

// ******************************
// PART I: LOAD ALL DATA
// ******************************

// ******************************
// LOAD I-O TABLES

use data/vdfa.dta, clear

// drop useless regions
drop if isNotReallyACountry==1
// drop dwellings, it's not really a proper concept
// alternative: count as real estate
drop if upstream==36
drop if downstream==36

// ******************************
// LOAD IMPORTS

merge 1:1 countrycode upstream downstream using data/vifa.dta
drop if upstream==36
drop if downstream==36
assert _merge==3
drop _merge

// ******************************
// LOAD GROSS OUTPUT (AFTER TAXES)

merge m:1 downstream countrycode using data/voa.dta
drop if downstream==36
assert _merge==3
drop _merge

// ******************************
// LOAD COURT DATA AND LITIGIOSITY MEASURES

merge m:1 upstream downstream using data/litigation.dta, keepusing(numberOfCases numberOfCases_noNames numberOfFirms_row numberOfFirms_col)
assert _merge!=2
drop _merge
rename numberOfFirms_row numberOfFirmsOrbisUpstream
rename numberOfFirms_col numberOfFirmsOrbisDownstream

// ******************************
// LOAD RAUCH MEASURE
rename upstream gtap
merge m:1 gtap using data/rauch_by_myagg.dta
rename con con_up
rename lib lib_up
drop if gtap==36 // dwellings
assert _merge!=2
drop _merge
rename gtap upstream
rename downstream gtap
merge m:1 gtap using data/rauch_by_myagg.dta
drop if gtap==36 // dwellings
assert _merge!=2
rename con con_down
rename lib lib_down
drop _merge
rename gtap downstream
rename con_up con
rename lib_up lib

// ******************************
// LOAD NUMBER OF FIRMS BY SECTOR (SUSB)
// downstream
rename downstream gtap
merge m:1 gtap using data/susbfirms_by_myagg.dta
assert _merge==3
drop _merge
rename gtap downstream
rename firmsbymyagg susbFirmsDownstream
// upstream
rename upstream gtap
merge m:1 gtap using data/susbfirms_by_myagg.dta
assert _merge==3
drop _merge
rename gtap upstream
rename firmsbymyagg susbFirmsUpstream

// ******************************
// LOAD ENFORCEMENT COSTS
save __temp.dta, replace
clear
use data/doingbusiness.dta
drop if enforcingcontractscostofclaim==.
// pick the one that is closest to 2005
gen distancefrom2005=abs(2005-year)
bysort countrycode (distancefrom2005 year): gen keepme=1 if _n==1
drop if keepme!=1
drop keepme
drop year
keep countrycode enforcingcontractstimedays enforcingcontractscostofclaim enforcingcontractsproceduresnumb enforcingcontractsrank
save __tempusing.dta, replace
clear
use __temp.dta
merge m:1 countrycode using __tempusing.dta
drop if _merge==2
assert _merge==3
drop _merge
erase __tempusing.dta
erase __temp.dta

// ******************************
// FINANCIAL DEVELOPMENT by country (private credit/GDP, from WDI)
replace countrycode_string="ROM" if countrycode_string=="ROU"
merge m:1 countrycode_string using data/findev2000.dta
drop if _merge==2
drop _merge
replace countrycode_string="ROU" if countrycode_string=="ROM"


// ******************************
// JAPANESE I-O LINKAGES 
merge m:1 upstream downstream using data/linkages_myagg.dta
drop _merge

// ******************************
// JAPANESE NUMBER OF FIRMS
rename upstream myagg
merge m:1 myagg using "data/japan_firms_2004-myagg.dta"
drop _merge
rename myagg upstream
rename firms_japan firms_japan_upstream
rename downstream myagg
merge m:1 myagg using "data/japan_firms_2004-myagg.dta"
drop _merge
rename myagg downstream
rename firms_japan firms_japan_downstream

// ******************************
// GDPC/CAPITA
merge m:1 countrycode using "data/WorldDevIndicators/gdpc_ppp_withcountrycode.dta", keepusing(gdpc_ppp)
drop if _merge==2
drop _merge
replace gdpc_ppp=27252.01 if countrycode_string=="TWN" //World Bank does not 'recognize' Taiwan...
replace gdpc_ppp=639.65015 if countrycode_string=="MWI"
gen loggdpc=log(gdpc_ppp)

// ******************************
// CAPITAL AND R&D INTENSITY
rename downstream myagg
merge m:1 myagg using "data/cap_intensity/cap_myagg.dta"
drop _merge
rename myagg downstream
rename downstream myagg
merge m:1 myagg using "data/rnd_intensity/rnd_myagg.dta"
drop _merge
rename myagg downstream

// ******************************
// MISC

// add names
rename countrycode_string iso31661countrychar3code
merge m:1 iso31661countrychar3code using data/countrycodes.dta
assert _merge!=1
drop if _merge==2
drop _merge
rename iso31661countryname countryname
drop countryalternatenames iso31661countrychar2code  iso31661countrynumbercode fipscountrycode fipscountryname unregion unsubregionname cdhid comments gpslatitude gpslongitude
rename iso31661countrychar3code countrycode_string

labmask countrycode, values(countryname)

